* This do file creates the output used in Tables 1, 2, A4, and A5


capture log close
set more off
clear all

* If needed change global path to point to directory where files are stored on your computer
global path "I:/DataSets5/Duncan/Dropbox/Recent Minimum Wage Changes/2020.12 NBER Update/JOLE Precommittment Replication"
global dtadir "$path/Data"
global tabdir "$path/Tables"
global figdir "$path/Figures"
global estdir "$path/Estimates"
global logdir "$path/Logfiles"

log using "${path}/Logfiles/SummaryStatsStandard_ACS_CEPCategories.log", replace


*** Assemble relevant years of ACS 
use "${path}/Data/ACS-2019.dta", clear

drop if year < 2011

*** Drop seniors
drop if age >= 65 | age < 16
drop if empstat == 0

*** Construct economic outcomes of interest 

gen employed = 0
replace employed = 1 if  empstat == 1

**** Construct education variables

gen dropout = 0 
replace dropout = 1 if educ < 6
gen highschool = 0 
replace highschool = 1 if educ == 6
gen somecollege = 0
replace somecollege = 1 if educ >= 7 & educ < 10
gen collegeplus = 0
replace collegeplus = 1 if educ >= 10

**** Construct full vs part-time variable

gen parttime = 0
replace parttime  = 1 if uhrswork >= 40

** creates mid-skill employment rate
gen group = 0
replace group = 1 if (age <= 30 & age > 21 & highschool == 1) | (age > 30 & age <= 45 & dropout == 1) | (age > 45 & age < 65 & dropout == 1)

egen stateempD = mean(employed) if group == 1, by(year statefip)
egen stateempE = max(stateempD), by(year statefip)

destring statefip, replace
***
* Assign the policy categories
***

merge m:1 statefip using "${path}/Data/min_wage_variables_for_ACS_and_CPS_analysis.dta"
drop _merge 

gen indexer = 0
replace indexer = 1 if originaltype == "Indexer"
gen StatIncreaserLarge = 0
replace StatIncreaserLarge = 1 if indexer == 0 & (jan2015min - jan2013min) >= 1 & (jan2017min - jan2013min) != .
gen statutoryincreasein2014or2015 = 0
replace statutoryincreasein2014or2015 = 1 if (jan2016min - jan2013min) > 0 & indexer == 0
gen StatIncreaserSmall = 0
replace StatIncreaserSmall = 1 if indexer == 0 & statutoryincreasein2014or2015 == 1 & StatIncreaserLarge == 0

gen firstchangebyjan14 = 0
replace firstchangebyjan14 = 1 if changesby2014 == 1 & indexer == 0

gen firstchangejan14to15 = 0
replace firstchangejan14to15 = 1 if changesby2015 == 1 & changesby2014 == 0 & indexer == 0

gen firstchangejan15to16 = 0
replace firstchangejan15to16 = 1 if changesby2016 == 1 & changesby2015 == 0 & changesby2014 == 0 & indexer == 0

* Merge HPI and PersonalIncome DAta
merge m:1 statefip year using "${path}/Data/HPI_acs_2019.dta"
drop if _merge == 2

drop _merge

rename perwt cmpwgt

merge m:1 statefip year using "${path}/Data/PersonalIncome_acs_2019.dta"
replace PersonalIncome = PersonalIncome/1000
drop _merge

**
* CHECK STATE POLICY CATEGORIES 
**

tab statefip if StatIncreaserLarge == 1
tab statefip if StatIncreaserSmall == 1
tab statefip if indexer == 1

gen black = 0 
replace black = 1 if race == 2


* Generate January minimum wage variable
gen EffectiveMinimumWage =.
forvalues i=2011/2019 {
	replace EffectiveMinimumWage = jan`i'min if year == `i'	
}

* Drop uneeded variables and compress data to speed estimation
capture drop datanum serial cbserial hhwt cluster strata gq pernum originaltype jan2011min jan2012min jan2013min jan2014min statincreasebyjan2014 jan2015min statincreasebyjan2015 jan2016min ///
statincreasebyjan2016 jan2017min jan2018min change2014 change2015 change2016 change2017 change2018 changesby2014 changesby2015 changesby2016 changesby2017 ///
changesby2018

compress

gen type = .
replace type = 1 if indexer == 0 &  StatIncreaserSmall == 0 & StatIncreaserLarge == 0
replace type = 2 if indexer == 1
replace type = 3 if StatIncreaserSmall == 1
replace type = 4 if StatIncreaserLarge == 1

gen anychange = 0
replace anychange = 1 if inrange(type,2,4)

*** Make Tables: Only 2019 as Post Period

gen Period = ""
replace Period = "A" if year >= 2011 & year <= 2013 
replace Period = "B" if year == 2019



label var black "Black"
label var HPI "House Price Index (1000s)"
label var PersonalIncome "Income Per Capita (1000s)"
label var employed "Employment"
label var highschool "High School Degree"
label var somecollege "Some College Education"
label var EffectiveMinimumWage "Effective Minimum Wage"
label var age "Age"

gen HPI_young = HPI if age <= 21
gen PersonalIncome_young = PersonalIncome if age <= 21
gen employed_young = employed if age <= 21
gen employed_lowskill = employed if dropout == 1 & age <= 25
gen employed_primeage = employed if age >= 26 & age <= 54
gen employed_midskill = employed if group == 1


*** Table 2: Unadjusted Differences Using ACS Data and $1 Cutoff and 2019 Post Period
preserve

gen post = 0 if year >= 2011 & year <= 2013
replace post = 1 if year == 2019

collapse employed_lowskill employed_young employed_primeage employed_midskill HPI_young PersonalIncome_young [aw=cmpwgt], by(type post)

* Round averages
foreach var of varlist employed_lowskill employed_young employed_primeage employed_midskill {
	replace `var' = round(`var', .001)
}

replace HPI_young = round(HPI_young, .1)
replace PersonalIncome_young = round(PersonalIncome_young, .01)

rename *_* v_*_*

drop if post ==.
reshape long v,  i(type post) j(var, string)
reshape wide v, i(type var) j(post)

rename v0 avg_2011_2013
rename v1 avg_2019

gen pre_post_diff = avg_2019 - avg_2011_2013
gen pre_post_diff_nc = pre_post_diff if type == 1
bysort var: egen pre_post_diff_nochange = max(pre_post_diff_nc)

gen change_nc = pre_post_diff - pre_post_diff_nochange
gen pct_change_2011_2013 = pre_post_diff / avg_2011_2013 * 100

drop pre_post_diff_nc pre_post_diff_nochange

replace change_nc = . if type == 1

replace var = "*emp*" if var == "_*emp"
replace var = "_HPI" if var == "_HPI_young"
replace var = "_PersonalIncome" if var == "_PersonalIncome_young"

* variable to order results
gen varnum = 1
replace varnum = 2 if var == "_employed_young"
replace varnum = 3 if var == "_employed_primeage"
replace varnum = 4 if var == "_employed_midskill"
replace varnum = 5 if var == "_HPI"
replace varnum = 6 if var == "_PersonalIncome"

sort varnum type

gen typestr = ""

replace typestr = "No Change" if type == 1
replace typestr = "Indexers" if type == 2
replace typestr = "Increasers < $1" if type == 3
replace typestr = "Increasers >= $1" if type == 4

order var typestr, first

drop type varnum pct_change_2011_2013

label var var "Variable"
label var typestr "Policy Category"
label var avg_2011_2013 "2011-2013"
label var avg_2019 "2019"
label var pre_post_diff "Change"
label var change_nc "Change Relative to Non-Increasers"

export excel using "$tabdir/unadjusted_differences_acs_cep_2019.xlsx", firstrow(varlabels) keepcellfmt replace

restore
 
*** Table A1: Sample Summary Statistics: ACS and Supplemental Data for 2011-2013 and 2019
bysort Period: eststo: estpost tabstat employed age black highschool somecollege HPI PersonalIncome EffectiveMinimumWage if dropout == 1 & age <= 25 [aw = cmpwgt], ///
	columns(statistics) statistics(mean sd)
bysort Period: eststo: estpost tabstat employed age black highschool somecollege HPI PersonalIncome EffectiveMinimumWage if age <= 21 [aw = cmpwgt], ///
	columns(statistics) statistics(mean sd)
esttab using "$tabdir/summary_stats_acs_2019.csv", replace main(mean) aux(sd) label nomtitle nolegend nonotes
estimates clear

cap drop Period 
gen Period = ""
replace Period = "A" if year >= 2011 & year <= 2013 
replace Period = "B" if year >= 2015 & year <= 2019



*** Table A5: Unadjusted Differences Using ACS Data and $1 Cutoff and 2015-2019 Post Period
preserve

gen post = 0 if year >= 2011 & year <= 2013
replace post = 1 if year >= 2015 & year <= 2019

collapse employed_lowskill employed_young employed_primeage employed_midskill HPI_young PersonalIncome_young [aw=cmpwgt], by(type post)

* Round averages
foreach var of varlist employed_lowskill employed_young employed_primeage employed_midskill {
	replace `var' = round(`var', .001)
}

replace HPI_young = round(HPI_young, .1)
replace PersonalIncome_young = round(PersonalIncome_young, .01)

rename *_* v_*_*

drop if post ==.
reshape long v,  i(type post) j(var, string)
reshape wide v, i(type var) j(post)

rename v0 avg_2011_2013
rename v1 avg_2015_2019

gen pre_post_diff = avg_2015_2019 - avg_2011_2013
gen pre_post_diff_nc = pre_post_diff if type == 1
bysort var: egen pre_post_diff_nochange = max(pre_post_diff_nc)

gen change_nc = pre_post_diff - pre_post_diff_nochange
gen pct_change_2011_2013 = pre_post_diff / avg_2011_2013 * 100

drop pre_post_diff_nc pre_post_diff_nochange

replace change_nc = . if type == 1

replace var = "*emp*" if var == "_*emp"
replace var = "_HPI" if var == "_HPI_young"
replace var = "_PersonalIncome" if var == "_PersonalIncome_young"

* variable to order results
gen varnum = 1
replace varnum = 2 if var == "_employed_young"
replace varnum = 3 if var == "_employed_primeage"
replace varnum = 4 if var == "_employed_midskill"
replace varnum = 5 if var == "_HPI"
replace varnum = 6 if var == "_PersonalIncome"

sort varnum type

gen typestr = ""

replace typestr = "No Change" if type == 1
replace typestr = "Indexers" if type == 2
replace typestr = "Increasers < $1" if type == 3
replace typestr = "Increasers >= $1" if type == 4

order var typestr, first

drop type varnum pct_change_2011_2013

label var var "Variable"
label var typestr "Policy Category"
label var avg_2011_2013 "2011-2013"
label var avg_2015_2019 "2015-2019"
label var pre_post_diff "Change"
label var change_nc "Change Relative to Non-Increasers"

export excel using "$tabdir/unadjusted_differences_acs_cep_20152019.xlsx", firstrow(varlabels) keepcellfmt replace

restore

*** Table 1: Sample Summary Statistics: ACS and Supplemental Data for 2011-2013 and 2015-2019
bysort Period: eststo: estpost tabstat employed age black highschool somecollege HPI PersonalIncome EffectiveMinimumWage if dropout == 1 & age <= 25 [aw = cmpwgt], ///
	columns(statistics) statistics(mean sd)
bysort Period: eststo: estpost tabstat employed age black highschool somecollege HPI PersonalIncome EffectiveMinimumWage if age <= 21 [aw = cmpwgt], ///
	columns(statistics) statistics(mean sd)
esttab using "$tabdir/summary_stats_acs_20152019.csv", replace main(mean) aux(sd) label nomtitle nolegend nonotes
estimates clear

log close
exit, clear
